*_____________________________________________________________________________________________________________________________________________________
*
**# FILES (including fat files, income & wealth file, biomarkers file if used)
*_____________________________________________________________________________________________________________________________________________________

clear
use "${hrsdir}\rndhrs_p.dta", replace
* Quick clarification, in case I forget:
label define INCPARTF 9 "9.no financial respondent", modify 
label define INCF 9 "9.no financial respondent", modify 


// Merging in biomarker data (which is already cleaned for this purpose in the biomarkers.do file in the biomarkers folder)
/* Currently unused; the global in the master .do file is commented out, so none of this code will run
capture merge 1:1 hhidpn using "${hrsbiomarkers}"
if _rc==0 {
	drop if _merge==2 			// Unclear why this one person exists
	drop _merge
	drop s*
/**/ }
*/


// Merging in data from RAND income & wealth files (primarily veterans benefits data)
merge 1:1 hhidpn using "${hrsincome}", keepusing(r*nvet r*nfvet r*iovet r*isemp r*nsdi r*nfsdi) gen(_mergeinc)
forvalues wave = 3/12 {
	recode r`wave'nvet (0=0 "0_not claimed")(12=1 "1_claimed all year")(1/11=2 "2_sometimes claimed or DK mths") 	///
		(99=3 "3_claims all year but not veteran")(999=4 "4_sometimes claimed or DK mths but not veteran"), gen(r`wave'bennow_vet)
	label var r`wave'bennow_vet "Veteran's benefit status"
	replace r`wave'bennow_vet = 2 if r`wave'nfvet==1			// Flag that DK number of months, for which RAND assume months==12
	replace r`wave'bennow_vet = 0 if r`wave'iovet==0		
	replace r`wave'bennow_vet = 3 if r`wave'bennow_vet==1 & ravetrn==0 		
	replace r`wave'bennow_vet = 4 if r`wave'bennow_vet==2 & ravetrn==0 		
/**/ }
	
	
// Merging in data from RAND fat files (primarily employment data)
/* Checking what is in each
	foreach dataset in h00f1c h02f2c h04f1a h06f2b h08f2a hd10f5c h12f1a h14e1a {
		local var ?c095* ?c096* ?c097* ?c103*
		dis `"capture des `var' using "${hrsfatfiles}\\`dataset'.dta"'
		capture des `var' using "${hrsfatfiles}\\`dataset'.dta
		if _rc==0	{
			dis _newline(5) "File is `dataset'"
			des `var' using "${hrsfatfiles}\\`dataset'.dta"
		/**/ }
	/**/ }
*/
merge 1:1 hhidpn using "${hrsfatfiles}\h00f1c.dta", keepusing(g3365m1 g3365m2 g3365m3 g3381 g3509 g3519 g3838 g3840 g3841) gen(_merge2000)
* Then lined up to match:
merge 1:1 hhidpn using "${hrsfatfiles}\h02f2c.dta" , keepusing(hj005m1 hj005m2 hj005m3 hj020 hj172 hj179 hj553 hj556 hj557																					 hc095 hc096 hc097 hc103) gen(_merge2002)
merge 1:1 hhidpn using "${hrsfatfiles}\h04f1a.dta" , keepusing(jj005m1 jj005m2 jj005m3 jj020 jj172 jj179 jj553 jj556 jj557 ji815 ji816 ji853 ji851 ji852 ji817 ji812 ji813m1 ji813m2 ji813m3 ji813m4 ji813m5 jc095 jc096 jc097 jc103) gen(_merge2004)
merge 1:1 hhidpn using "${hrsfatfiles}\h06f2b.dta" , keepusing(kj005m1 kj005m2 kj005m3 kj020 kj172 kj179 kj553 kj556 kj557 ki815 ki816 ki853 ki851 ki852 ki817 ki812 ki813m1 ki813m2 ki813m3 ki813m4 ki813m5 kc095 kc096 kc097 kc103) gen(_merge2006)
merge 1:1 hhidpn using "${hrsfatfiles}\h08f2a.dta" , keepusing(lj005m1 lj005m2 lj005m3 lj020 lj172 lj179 lj553 lj556 lj557 li815 li816 li853 li851 li852 li817 li812 li813m1 li813m2 li813m3 li813m4		 lc095 lc096 lc097 lc103) gen(_merge2008)
merge 1:1 hhidpn using "${hrsfatfiles}\hd10f5c.dta", keepusing(mj005m1 mj005m2 mj005m3 mj020 mj172 mj179 mj553 mj556 mj557 mi815 mi816 mi853 mi851 mi852 mi817 mi812 mi813m1 mi813m2 mi813m3 mi813m4 mi813m5 mc095 mc096 mc097 mc103) gen(_merge2010)
merge 1:1 hhidpn using "${hrsfatfiles}\h12f1a.dta" , keepusing(nj005m1 nj005m2 nj005m3 nj020 nj172 nj179 nj553 nj556 nj557 ni815 ni816 ni853 ni851 ni852 ni817 ni812 ni813m1 ni813m2 ni813m3 ni813m4 ni813m5 nc095 nc096 nc097 nc103) gen(_merge2012)
merge 1:1 hhidpn using "${hrsfatfiles}\h14e1a.dta" , keepusing(oj005m1 oj005m2 oj005m3 oj020 oj172 oj179 oj553 oj556 oj557 oi815 oi816 oi853 oi851 oi852 oi817 oi812 oi813m1 oi813m2 oi813m3 oi813m4 oi813m5 oc095 oc096 oc097 oc103) gen(_merge2014)
	rename g3365m1 r5lstat1
	rename g3365m2 r5lstat2
	rename g3365m3 r5lstat3
	rename g3381 r5anywork
	rename g3509 r5hrswk1
	rename g3519 r5wksyr1
	rename g3838 r5anywork2
	rename g3840 r5hrswk2
	rename g3841 r5wksyr2
	rename hj005m1 r6lstat1
	rename hj005m2 r6lstat2
	rename hj005m3 r6lstat3
	rename hj020 r6anywork
	rename hj172 r6hrswk1
	rename hj179 r6wksyr1
	rename hj553 r6anywork2
	rename hj556 r6hrswk2
	rename hj557 r6wksyr2
	rename jj005m1 r7lstat1
	rename jj005m2 r7lstat2
	rename jj005m3 r7lstat3
	rename jj020 r7anywork
	rename jj172 r7hrswk1
	rename jj179 r7wksyr1
	rename jj553 r7anywork2
	rename jj556 r7hrswk2
	rename jj557 r7wksyr2
	rename kj005m1 r8lstat1
	rename kj005m2 r8lstat2
	rename kj005m3 r8lstat3
	rename kj020 r8anywork
	rename kj172 r8hrswk1
	rename kj179 r8wksyr1
	rename kj553 r8anywork2
	rename kj556 r8hrswk2
	rename kj557 r8wksyr2
	rename lj005m1 r9lstat1
	rename lj005m2 r9lstat2
	rename lj005m3 r9lstat3
	rename lj020 r9anywork
	rename lj172 r9hrswk1
	rename lj179 r9wksyr1
	rename lj553 r9anywork2
	rename lj556 r9hrswk2
	rename lj557 r9wksyr2
	rename mj005m1 r10lstat1
	rename mj005m2 r10lstat2
	rename mj005m3 r10lstat3
	rename mj020 r10anywork
	rename mj172 r10hrswk1
	rename mj179 r10wksyr1
	rename mj553 r10anywork2
	rename mj556 r10hrswk2
	rename mj557 r10wksyr2
	rename nj005m1 r11lstat1
	rename nj005m2 r11lstat2
	rename nj005m3 r11lstat3
	rename nj020 r11anywork
	rename nj172 r11hrswk1
	rename nj179 r11wksyr1
	rename nj553 r11anywork2
	rename nj556 r11hrswk2
	rename nj557 r11wksyr2
	rename oj005m1 r12lstat1
	rename oj005m2 r12lstat2
	rename oj005m3 r12lstat3
	rename oj020 r12anywork
	rename oj172 r12hrswk1
	rename oj179 r12wksyr1
	rename oj553 r12anywork2
	rename oj556 r12hrswk2
	rename oj557 r12wksyr2
* Added 26/7/2017 grip strength vars
	rename ji815 r7gs_dhand
	rename ji816 r7gs_lhand1
	rename ji853 r7gs_lhand2
	rename ji851 r7gs_rhand1
	rename ji852 r7gs_rhand2
	rename ji817 r7gs_effort
	rename ji812 r7gs_anytrial
	rename ji813m1 r7gs_reason1
	rename ji813m2 r7gs_reason2
	rename ji813m3 r7gs_reason3
	rename ji813m4 r7gs_reason4
	rename ji813m5 r7gs_reason5
	rename ki815 r8gs_dhand
	rename ki816 r8gs_lhand1
	rename ki853 r8gs_lhand2
	rename ki851 r8gs_rhand1
	rename ki852 r8gs_rhand2
	rename ki817 r8gs_effort
	rename ki812 r8gs_anytrial
	rename ki813m1 r8gs_reason1
	rename ki813m2 r8gs_reason2
	rename ki813m3 r8gs_reason3
	rename ki813m4 r8gs_reason4
	rename ki813m5 r8gs_reason5
	rename li815 r9gs_dhand
	rename li816 r9gs_lhand1
	rename li853 r9gs_lhand2
	rename li851 r9gs_rhand1
	rename li852 r9gs_rhand2
	rename li817 r9gs_effort
	rename li812 r9gs_anytrial
	rename li813m1 r9gs_reason1
	rename li813m2 r9gs_reason2
	rename li813m3 r9gs_reason3
	rename li813m4 r9gs_reason4
	rename mi815 r10gs_dhand
	rename mi816 r10gs_lhand1
	rename mi853 r10gs_lhand2
	rename mi851 r10gs_rhand1
	rename mi852 r10gs_rhand2
	rename mi817 r10gs_effort
	rename mi812 r10gs_anytrial
	rename mi813m1 r10gs_reason1
	rename mi813m2 r10gs_reason2
	rename mi813m3 r10gs_reason3
	rename mi813m4 r10gs_reason4
	rename mi813m5 r10gs_reason5
	rename ni815 r11gs_dhand
	rename ni816 r11gs_lhand1
	rename ni853 r11gs_lhand2
	rename ni851 r11gs_rhand1
	rename ni852 r11gs_rhand2
	rename ni817 r11gs_effort
	rename ni812 r11gs_anytrial
	rename ni813m1 r11gs_reason1
	rename ni813m2 r11gs_reason2
	rename ni813m3 r11gs_reason3
	rename ni813m4 r11gs_reason4
	rename ni813m5 r11gs_reason5
	rename oi815 r12gs_dhand
	rename oi816 r12gs_lhand1
	rename oi853 r12gs_lhand2
	rename oi851 r12gs_rhand1
	rename oi852 r12gs_rhand2
	rename oi817 r12gs_effort
	rename oi812 r12gs_anytrial
	rename oi813m1 r12gs_reason1
	rename oi813m2 r12gs_reason2
	rename oi813m3 r12gs_reason3
	rename oi813m4 r12gs_reason4
	rename oi813m5 r12gs_reason5
	* Added for 2023 OECD working paper
	rename hc095 r6see_screen_HRS
	rename hc096 r6hefrnd
	rename hc097 r6hepap
	rename hc103 r6hehear
	rename jc095 r7see_screen_HRS
	rename jc096 r7hefrnd
	rename jc097 r7hepap
	rename jc103 r7hehear
	rename kc095 r8see_screen_HRS
	rename kc096 r8hefrnd
	rename kc097 r8hepap
	rename kc103 r8hehear
	rename lc095 r9see_screen_HRS
	rename lc096 r9hefrnd
	rename lc097 r9hepap
	rename lc103 r9hehear
	rename mc095 r10see_screen_HRS
	rename mc096 r10hefrnd
	rename mc097 r10hepap
	rename mc103 r10hehear
	rename nc095 r11see_screen_HRS
	rename nc096 r11hefrnd
	rename nc097 r11hepap
	rename nc103 r11hehear
	rename oc095 r12see_screen_HRS
	rename oc096 r12hefrnd
	rename oc097 r12hepap
	rename oc103 r12hehear
drop _merge*



*_____________________________________________________________________________________________________________________________________________________
*
**# RESHAPING INTO LONG FORMAT
*_____________________________________________________________________________________________________________________________________________________

// Renaming for consistency with SHARE
gen cid = "51"		// This is to generate a unique country ID, so that there are no duplicate IDs across countries
gen country = 51
gen survey = "HRS"
gen str11 mergeid = cid + strofreal(hhidpn, "%09.0f")
	label var mergeid "Country ID + HHIDPN, to be unique across countries"
order mergeid, first

// Rehsaping
*local keepvars "r*shlt r*hlthlm" 
local keepvars 		"r*walks r*walk1 r*jog r*sit r*chair r*clims r*clim1 r*stoop r*arms r*push r*lift r*dime"
local keepvars 		"`keepvars' r*dress r*walkr r*bath r*eat r*bed r*toilt r*map r*meals r*shop r*phone r*meds r*money"
local keepvars 		"`keepvars' r*hlthlm r*shlt r*imrc r*fimrc r*dlrc r*fdlrc"
local keepvars 		"`keepvars' r*depres r*effort r*sleepr r*whappy r*flone r*fsad r*going r*enlife r*cesd r*cesdm"
local keepvars 		"`keepvars' r*gs_dhand r*gs_lhand1 r*gs_lhand2 r*gs_rhand1 r*gs_rhand2 r*gs_effort r*gs_anytrial r*gs_reason1 r*gs_reason2 r*gs_reason3 r*gs_reason4 r*gs_reason5" // Grip strength
local keepvars 		"`keepvars' r*back r*hearte r*hibpe r*stroke r*diabe r*lunge r*arthre r*cancre r*hosp r*homcar r*doctim r*nrshom r*bmi"		// Added for Boheim-Leoni collaboration
local keepvars 		"`keepvars' r*psyche" // Nonoe of these exist: r*asthmae r*hchole  r*parkine  r*catrcte r*hipfeme 											// Condiitons added for OECD work
local keepvars 		"`keepvars' r*lbrf r*work r*jhours r*jhour2 r*jweeks r*jweek2 r*iearn r*ifearn r?isret r?iunwc r?igxfr r?ipena h?itot"
local keepvars 		"`keepvars' r*jlasty r*jlastm r*jcten"																						// Added for Boheim-Leoni collaboration
local keepvars 		"`keepvars' r*see_screen_HRS r*hefrnd r*hepap r*hehear"																							// Added for 2023 OECD report	
local keepvars 		"`keepvars' r*dstat r*issdi r*ifssdi r*issi r*ifssi r*isdi r*ifsdi r*ifwcmp r*iwcmp"
local keepvars 		"`keepvars' r*lstat1 r*lstat2 r*lstat3 r*bennow_vet r*isemp r*nsdi r*nfsdi "		// Added from fat files / income files by BBG - also at one point looked at r*anywork r*inlbrf r*hrswk1 r*wksyr1 r*anywork2 r*hrswk2 r*wksyr2
local keepvars 		"`keepvars' inw* r*wtresp r*proxy r*agey_e r*agem_e h?child r*mstat r*iwendy "
local crosswavevars	"ragender rabyear rabmonth raedyrs survey country"
keep mergeid cid `keepvars' `crosswavevars'
local reshapevars = subinstr("`keepvars'", "*", "@", .)	   
local reshapevars = subinstr("`reshapevars'", "?", "@", .)	  
reshape long `reshapevars', i(mergeid) j(wave)


*_____________________________________________________________________________________________________________________________________________________
*
**# FURTHER CLEANING
*_____________________________________________________________________________________________________________________________________________________

// Number of doctors appointments [from Boheim/Leoni cleaning code]
// [see documentation, after W5 respondents could use a bracket to answer the question]
replace rdoctim = 2  if rdoctim==.e		// 1-4 times
replace rdoctim = 2  if rdoctim==.j		// 0-5 times
replace rdoctim = 10 if rdoctim==.k		// 1-19 times
replace rdoctim = 12 if rdoctim==.f		// 6-19 times
replace rdoctim = 35 if rdoctim==.g		// 21-49 times
replace rdoctim = 25 if rdoctim==.l		// 21 or more times
replace rdoctim = 55 if rdoctim==.h		// 51 or more times
// Not included (as per Boheim-Leoni): .i indicates at least once


// Back problems variables
// This is only asked in alternate waves (for repeat interviewees) - but while it changes over time, this is just about OK as an approximation
gen rback2 = rback
sort mergeid wave
replace rback2 = rback[_n-1] if mergeid==mergeid[_n-1] & (wave - wave[_n-1]==1) & rback2==.a & wave>=4
	label values rback2 PREVENT
	label var rback2 "CAUTION: back problems with carry-forward for repeat interviewees for alternate waves"


// Grip stength vars
egen rmaxgrip = rowmax(rgs_?hand?)
	label var rmaxgrip "Max grip strength in kg (of either hand, 2 measures each)"
	replace rmaxgrip = .r if inlist(rmaxgrip, 999, 9999, 99999)			// if R chose not to do it
	replace rmaxgrip = .d if inlist(rmaxgrip, 993, 9993, 99993)			// if R tried but was unable
	replace rmaxgrip = .o if inlist(rmaxgrip, 996, 998, 9998, 99998) 	// not labelled, but clearly another missing value!
	replace rmaxgrip = . if wave==7				// This is the 2004 data on a smaller subample, which I don't need to use
egen rgs_no_safe = anymatch(rgs_reason?), values(1 2)
	replace rgs_no_safe = . if rgs_reason1==. & missing(rmaxgrip)
	replace rgs_no_safe = . if wave==7
	label var rgs_no_safe "No GS measure: iviewer or resp thinks not safe"
egen rgs_no_inj = anymatch(rgs_reason?)  if wave~=7, values(6)		// Previously also used value 4, tried but failed to complete test, but after checking it looks like this makes it incomparable to ELSA/SHARE
	replace rgs_no_inj = . if rgs_reason1==. & missing(rmaxgrip)
	replace rgs_no_inj = . if wave==7
	label var rgs_no_inj "No GS measure: R had surgery/injury on both hands"
*egen rgs_no_oth = anymatch(rgs_reason?), values(3 5 7 8 97)
*	label var rgs_no_oth  "No GS measure: R refused or didnt understand instructions or other problem"
label var rgs_effort "IWER rating of how much effort R gave to test"
	label define rgs_effort 1 "1_R gave full effort" 2 "2_R was prevented from giving full effort by illness, pain, or other symptoms or discomforts" ///
		3  "3_R did not appear to give full effort, but no obvious reason for this" 9 "9_no answer"
	label values rgs_effort rgs_effort
drop rgs_reason? rgs_?hand* rgs_anytrial


// Disability benefit vars
* BENNOW - whether claiming at the moment (from RDSTAT)
recode rdstat (2 12 20 21 22 200=1 "1_receives SSI/SSDI")(0 1 10 11 100=0 "0_doesnt receive SSI/SSDI"), gen(rbennow_incap)
	label var rbennow_incap "Current incapacity benefit receipt (SSI/SSDI)"
recode rdstat (20 21 22=1 "1_receives SSDI")(0 1 2 10 11 12 100 200=0 "0_doesnt receive SSDI (but may receive SSI"), gen(rbennow_ssdi)
	label var rbennow_ssdi "Current SSDI receipt (ignoring SSI)"
clonevar rbennow_incap3 = rbennow_incap
	replace rbennow_incap3 = 1 if rbennow_vet==1
	label var rbennow_incap3 "Current veterans bens or incapacity benefit receipt (SSI/SSDI)"
* BENINC - whether gets income from claiming (from RSSDI)
label define benimpute 0 "0_no income in last year" 1 "1_bens received by SR" 2 "2_bens received by imputation", modify
	recode rissdi (0=0)(0.1/max=1), gen(rbeninc3_dis)				// RAND version with imputation flag
	replace rbeninc3_dis = 2 if rifssdi>1 & rbeninc3_dis==1
label define benstat 0 "0_no income in last year" 1 "1_DI for ALL of last year"  2 "2_DI for PART of last year", modify
	recode rissdi (0=0)(0.1/max=1), gen(rbeninc2_dis)				// My version ignoring imputations
	replace rbeninc3_dis = .m if rifssdi>1 & ~missing(rifssdi)
	clonevar rbenstat2_dis = rbeninc2_dis
	replace rbenstat2_dis = 2 if rbenstat2_dis==1 & rnsdi<12 | rnfsdi==1
* WORKERS COMPENSATION (akin to IIDB)
recode riwcmp (0=0)(0.1/max=1), gen(rbeninc3_wc)
	replace rbeninc3_wc = 2 if rifwcmp>1 & rbeninc3_wc==1
	label var rbeninc3_wc "HRS: inc from Workers' Compensation in past 12mth"
* Tidying
label values rbenstat* 	benstat
label values rbeninc3*	benimpute
drop rdstat rnsdi rnfsdi


// Cleaning
order _all, sequential
drop *jstoop *jlift
replace rhlthlm = . if wave==7			// RAND codebook notes: "In Wave 7, if re-interviewees have previously reported a health limitation, these questions are skipped and assigned ".Y=Assumed Yes."
ren rhlthlm rwld
drop if wave<7  		// The reason for this is that f2f became the default mode at w7, making it more comparable to SHARE/ELSA (esp wrt proxies et al)
ren riwendy riwy
drop *inlbrf		// Not to be confused with the RLBRF var (RAND file), which I use
drop *anywork		// Not to be confused with the RWORK var (RAND file), which I use
* SR emp stat
recode rlstat? (8=.d) (9=.r)  if wave<11
recode rlstat? (98=.d)(99=.r) if wave>=11
label define srlstat 1 "1_working now" 2 "2_unemp" 3 "3_temporarily away" 4 "4_disabled" 5 "5_retired" 6 "6_homemaker" 7 "7_other" ///
	8 "8_(vol) sick/other leave from w11" 
label values rlstat* srlstat


// Cleaning
compress
save "${hrsdir}\HRS_BBG_${versno}.dta", replace
/*
use "${hrsdir}\HRS_BBG_${versno}.dta", replace
*/